\documentclass[11pt]{article}

    \usepackage[breakable]{tcolorbox}
    \usepackage{parskip} % Stop auto-indenting (to mimic markdown behaviour)
    \usepackage{hyperref}
    \usepackage[authoryear]{natbib}
    \usepackage{iftex}
    \ifPDFTeX
    	\usepackage[T1]{fontenc}
    	\usepackage{mathpazo}
    \else
    	\usepackage{fontspec}
    \fi
    % Bibliography
\usepackage[authoryear]{natbib}
\bibliographystyle{chicago}
%\setcitestyle{authoryear,open={(},close={)}}
\usepackage{bibentry}

    % Basic figure setup, for now with no caption control since it's done
    % automatically by Pandoc (which extracts ![](path) syntax from Markdown).
    \usepackage{graphicx}
    % Maintain compatibility with old templates. Remove in nbconvert 6.0
    \let\Oldincludegraphics\includegraphics
    % Ensure that by default, figures have no caption (until we provide a
    % proper Figure object with a Caption API and a way to capture that
    % in the conversion process - todo).
    \usepackage{caption}
    \DeclareCaptionFormat{nocaption}{}
    \captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt}

    \usepackage{float}
    \floatplacement{figure}{H} % forces figures to be placed at the correct location
    \usepackage{xcolor} % Allow colors to be defined
    \usepackage{enumerate} % Needed for markdown enumerations to work
    \usepackage{geometry} % Used to adjust the document margins
    \usepackage{amsmath} % Equations
    \usepackage{amssymb} % Equations
    \usepackage{textcomp} % defines textquotesingle
    % Hack from http://tex.stackexchange.com/a/47451/13684:
    \AtBeginDocument{%
        \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
    }
    \usepackage{upquote} % Upright quotes for verbatim code
    \usepackage{eurosym} % defines \euro
    \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support
    \usepackage{fancyvrb} % verbatim replacement that allows latex
    \usepackage{grffile} % extends the file name processing of package graphics 
                         % to support a larger range
    \makeatletter % fix for old versions of grffile with XeLaTeX
    \@ifpackagelater{grffile}{2019/11/01}
    {
      % Do nothing on new versions
    }
    {
      \def\Gread@@xetex#1{%
        \IfFileExists{"\Gin@base".bb}%
        {\Gread@eps{\Gin@base.bb}}%
        {\Gread@@xetex@aux#1}%
      }
    }
    \makeatother
    \usepackage[Export]{adjustbox} % Used to constrain images to a maximum size
    \adjustboxset{max size={0.9\linewidth}{0.9\paperheight}}

    % The hyperref package gives us a pdf with properly built
    % internal navigation ('pdf bookmarks' for the table of contents,
    % internal cross-reference links, web links for URLs, etc.)
    \usepackage{hyperref}
    % The default LaTeX title has an obnoxious amount of whitespace. By default,
    % titling removes some of it. It also provides customization options.
    \usepackage{titling}
    \usepackage{longtable} % longtable support required by pandoc >1.10
    \usepackage{booktabs}  % table support for pandoc > 1.12.2
    \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment)
    \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout)
                                % normalem makes italics be italics, not underlines
    \usepackage{mathrsfs}
    

    
    % Colors for the hyperref package
    \definecolor{urlcolor}{rgb}{0,.145,.698}
    \definecolor{linkcolor}{rgb}{.36,.54,.66}
    \definecolor{citecolor}{rgb}{.21,.54,.66}

    % ANSI colors
    \definecolor{ansi-black}{HTML}{3E424D}
    \definecolor{ansi-black-intense}{HTML}{282C36}
    \definecolor{ansi-red}{HTML}{E75C58}
    \definecolor{ansi-red-intense}{HTML}{B22B31}
    \definecolor{ansi-green}{HTML}{00A250}
    \definecolor{ansi-green-intense}{HTML}{007427}
    \definecolor{ansi-yellow}{HTML}{DDB62B}
    \definecolor{ansi-yellow-intense}{HTML}{B27D12}
    \definecolor{ansi-blue}{HTML}{208FFB}
    \definecolor{ansi-blue-intense}{HTML}{0065CA}
    \definecolor{ansi-magenta}{HTML}{D160C4}
    \definecolor{ansi-magenta-intense}{HTML}{A03196}
    \definecolor{ansi-cyan}{HTML}{60C6C8}
    \definecolor{ansi-cyan-intense}{HTML}{258F8F}
    \definecolor{ansi-white}{HTML}{C5C1B4}
    \definecolor{ansi-white-intense}{HTML}{A1A6B2}
    \definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF}
    \definecolor{ansi-default-inverse-bg}{HTML}{000000}

    % common color for the border for error outputs.
    \definecolor{outerrorbackground}{HTML}{FFDFDF}

    % commands and environments needed by pandoc snippets
    % extracted from the output of `pandoc -s`
    \providecommand{\tightlist}{%
      \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
    \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
    % Add ',fontsize=\small' for more characters per line
    \newenvironment{Shaded}{}{}
    \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
    \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
    \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
    \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
    \newcommand{\RegionMarkerTok}[1]{{#1}}
    \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\NormalTok}[1]{{#1}}
    
    % Additional commands for more recent versions of Pandoc
    \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}}
    \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}}
    \newcommand{\ImportTok}[1]{{#1}}
    \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}}
    \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}}
    \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}}
    \newcommand{\BuiltInTok}[1]{{#1}}
    \newcommand{\ExtensionTok}[1]{{#1}}
    \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}}
    \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}}
    \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    
    
    % Define a nice break command that doesn't care if a line doesn't already
    % exist.
    \def\br{\hspace*{\fill} \\* }
    % Math Jax compatibility definitions
    \def\gt{>}
    \def\lt{<}
    \let\Oldtex\TeX
    \let\Oldlatex\LaTeX
    \renewcommand{\TeX}{\textrm{\Oldtex}}
    \renewcommand{\LaTeX}{\textrm{\Oldlatex}}
    % Document parameters
    % Document title
    \title{Leave-Out Estimation of Variance Components in Two-Way Fixed Effects Models Using MATLAB}
    
    
    
% Pygments definitions
\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
    \let\PY@ul=\relax \let\PY@tc=\relax%
    \let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
    \PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
    \PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}

\expandafter\def\csname PY@tok@w\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\expandafter\def\csname PY@tok@c\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\expandafter\def\csname PY@tok@k\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\expandafter\def\csname PY@tok@o\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ow\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@nb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@nn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@ne\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\expandafter\def\csname PY@tok@nv\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@no\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@nl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@ni\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\expandafter\def\csname PY@tok@na\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\expandafter\def\csname PY@tok@nt\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@nd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PY@tok@s\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sd\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@si\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@se\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\expandafter\def\csname PY@tok@sr\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PY@tok@ss\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sx\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@m\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@gh\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gu\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@gd\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@gi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\expandafter\def\csname PY@tok@gr\endcsname{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\expandafter\def\csname PY@tok@ge\endcsname{\let\PY@it=\textit}
\expandafter\def\csname PY@tok@gs\endcsname{\let\PY@bf=\textbf}
\expandafter\def\csname PY@tok@gp\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PY@tok@go\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\expandafter\def\csname PY@tok@gt\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\expandafter\def\csname PY@tok@err\endcsname{\def\PY@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
\expandafter\def\csname PY@tok@kc\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kd\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kn\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@kr\endcsname{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@bp\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PY@tok@fm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PY@tok@vc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vg\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@vm\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PY@tok@sa\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sc\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@dl\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s2\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@sh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@s1\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PY@tok@mb\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mf\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mh\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mi\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@il\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@mo\endcsname{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PY@tok@ch\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cm\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cpf\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@c1\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PY@tok@cs\endcsname{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}

\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother


    % For linebreaks inside Verbatim environment from package fancyvrb. 
    \makeatletter
        \newbox\Wrappedcontinuationbox 
        \newbox\Wrappedvisiblespacebox 
        \newcommand*\Wrappedvisiblespace {\textcolor{red}{\textvisiblespace}} 
        \newcommand*\Wrappedcontinuationsymbol {\textcolor{red}{\llap{\tiny$\m@th\hookrightarrow$}}} 
        \newcommand*\Wrappedcontinuationindent {3ex } 
        \newcommand*\Wrappedafterbreak {\kern\Wrappedcontinuationindent\copy\Wrappedcontinuationbox} 
        % Take advantage of the already applied Pygments mark-up to insert 
        % potential linebreaks for TeX processing. 
        %        {, <, #, %, $, ' and ": go to next line. 
        %        _, }, ^, &, >, - and ~: stay at end of broken line. 
        % Use of \textquotesingle for straight quote. 
        \newcommand*\Wrappedbreaksatspecials {% 
            \def\PYGZus{\discretionary{\char`\_}{\Wrappedafterbreak}{\char`\_}}% 
            \def\PYGZob{\discretionary{}{\Wrappedafterbreak\char`\{}{\char`\{}}% 
            \def\PYGZcb{\discretionary{\char`\}}{\Wrappedafterbreak}{\char`\}}}% 
            \def\PYGZca{\discretionary{\char`\^}{\Wrappedafterbreak}{\char`\^}}% 
            \def\PYGZam{\discretionary{\char`\&}{\Wrappedafterbreak}{\char`\&}}% 
            \def\PYGZlt{\discretionary{}{\Wrappedafterbreak\char`\<}{\char`\<}}% 
            \def\PYGZgt{\discretionary{\char`\>}{\Wrappedafterbreak}{\char`\>}}% 
            \def\PYGZsh{\discretionary{}{\Wrappedafterbreak\char`\#}{\char`\#}}% 
            \def\PYGZpc{\discretionary{}{\Wrappedafterbreak\char`\%}{\char`\%}}% 
            \def\PYGZdl{\discretionary{}{\Wrappedafterbreak\char`\$}{\char`\$}}% 
            \def\PYGZhy{\discretionary{\char`\-}{\Wrappedafterbreak}{\char`\-}}% 
            \def\PYGZsq{\discretionary{}{\Wrappedafterbreak\textquotesingle}{\textquotesingle}}% 
            \def\PYGZdq{\discretionary{}{\Wrappedafterbreak\char`\"}{\char`\"}}% 
            \def\PYGZti{\discretionary{\char`\~}{\Wrappedafterbreak}{\char`\~}}% 
        } 
        % Some characters . , ; ? ! / are not pygmentized. 
        % This macro makes them "active" and they will insert potential linebreaks 
        \newcommand*\Wrappedbreaksatpunct {% 
            \lccode`\~`\.\lowercase{\def~}{\discretionary{\hbox{\char`\.}}{\Wrappedafterbreak}{\hbox{\char`\.}}}% 
            \lccode`\~`\,\lowercase{\def~}{\discretionary{\hbox{\char`\,}}{\Wrappedafterbreak}{\hbox{\char`\,}}}% 
            \lccode`\~`\;\lowercase{\def~}{\discretionary{\hbox{\char`\;}}{\Wrappedafterbreak}{\hbox{\char`\;}}}% 
            \lccode`\~`\:\lowercase{\def~}{\discretionary{\hbox{\char`\:}}{\Wrappedafterbreak}{\hbox{\char`\:}}}% 
            \lccode`\~`\?\lowercase{\def~}{\discretionary{\hbox{\char`\?}}{\Wrappedafterbreak}{\hbox{\char`\?}}}% 
            \lccode`\~`\!\lowercase{\def~}{\discretionary{\hbox{\char`\!}}{\Wrappedafterbreak}{\hbox{\char`\!}}}% 
            \lccode`\~`\/\lowercase{\def~}{\discretionary{\hbox{\char`\/}}{\Wrappedafterbreak}{\hbox{\char`\/}}}% 
            \catcode`\.\active
            \catcode`\,\active 
            \catcode`\;\active
            \catcode`\:\active
            \catcode`\?\active
            \catcode`\!\active
            \catcode`\/\active 
            \lccode`\~`\~ 	
        }
    \makeatother

    \let\OriginalVerbatim=\Verbatim
    \makeatletter
    \renewcommand{\Verbatim}[1][1]{%
        %\parskip\z@skip
        \sbox\Wrappedcontinuationbox {\Wrappedcontinuationsymbol}%
        \sbox\Wrappedvisiblespacebox {\FV@SetupFont\Wrappedvisiblespace}%
        \def\FancyVerbFormatLine ##1{\hsize\linewidth
            \vtop{\raggedright\hyphenpenalty\z@\exhyphenpenalty\z@
                \doublehyphendemerits\z@\finalhyphendemerits\z@
                \strut ##1\strut}%
        }%
        % If the linebreak is at a space, the latter will be displayed as visible
        % space at end of first line, and a continuation symbol starts next line.
        % Stretch/shrink are however usually zero for typewriter font.
        \def\FV@Space {%
            \nobreak\hskip\z@ plus\fontdimen3\font minus\fontdimen4\font
            \discretionary{\copy\Wrappedvisiblespacebox}{\Wrappedafterbreak}
            {\kern\fontdimen2\font}%
        }%
        
        % Allow breaks at special characters using \PYG... macros.
        \Wrappedbreaksatspecials
        % Breaks at punctuation characters . , ; ? ! and / need catcode=\active 	
        \OriginalVerbatim[#1,codes*=\Wrappedbreaksatpunct]%
    }
    \makeatother

    % Exact colors from NB
    \definecolor{incolor}{HTML}{303F9F}
    \definecolor{outcolor}{HTML}{D84315}
    \definecolor{cellborder}{HTML}{CFCFCF}
    \definecolor{cellbackground}{HTML}{F7F7F7}
    
    % prompt
    \makeatletter
    \newcommand{\boxspacing}{\kern\kvtcb@left@rule\kern\kvtcb@boxsep}
    \makeatother
    \newcommand{\prompt}[4]{
        {\ttfamily\llap{{\color{#2}[#3]:\hspace{3pt}#4}}\vspace{-\baselineskip}}
    }
    

    
    % Prevent overflowing lines due to hard-to-break entities
    \sloppy 
    % Setup hyperref package
    \hypersetup{
      breaklinks=true,  % so long urls are correctly broken across lines
      colorlinks=true,
      urlcolor=urlcolor,
      linkcolor=linkcolor,
      citecolor=citecolor,
      }
    % Slightly bigger margins than the latex defaults
    
    \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
    
    

\begin{document}
    
        \maketitle
    
    

    
    This notebook describes the MATLAB package that implements the leave-out
correction of \cite*{kline2020leave} (henceforth KSS)  for
two-way fixed effects models.

    \tableofcontents
    \newpage

    \hypertarget{introduction}{%
\section{Introduction}\label{introduction}}

Economists often study settings where units possess two or more group
memberships, some of which can change over time. A prominent example
comes from \cite{abowd1999high} (henceforth AKM) who
proposed a panel model of log wage determination that is additive in
worker and firm fixed effects.

This so-called ``two-way'' fixed effects or ``AKM'' model takes the
form

\begin{equation}
    y_{gt} =  \alpha_{{g}} + \psi_{j({g},t)} + w_{gt}'\delta +  \varepsilon_{gt}  \qquad({g}=1,\dots,N, \ t=1,\dots,T_{g} \ge 2),
\end{equation}

where the function
\(j(\cdot ,\cdot ):\lbrace 1,\dots ,N\rbrace \times \lbrace 1,\dots ,\max_i T_g \rbrace \to \lbrace 1,\dots ,J\rbrace\)
assigns a worker \(g\) and year \(t\) observation to one of \(J\) firms.
Here \(\alpha_g\) is a person effect, \(\psi_{j(g,t)}\) is a firm
effect, \(w_{gt}\) is a time-varying covariate, and \(\varepsilon_{gt}\)
is a mean-independent time-varying error.

We can rewrite the original AKM model as:

\begin{equation}
y_i =x_i^{\prime } \beta +\varepsilon_i \qquad i=1,...,n,
\end{equation}

where \(i\) indexes a particular person-year observation \((g,t)\),
\(x_i\) is a vector that collects all the worker and firm dummies as well
as the time-varying covariates \(w_{gt}\) so that
\(\beta =(\alpha ,\psi ,\delta )'\) is a \(k\times 1\) vector that
collects all the worker and firm fixed effects along with \(\delta\).

Interest in AKM models often centers on understanding how much of the
variability in log wages is attributable to firms. It is common to
summarize the firm contribution to wage inequality using the following
two parameters:

\begin{equation}
\sigma_{\psi }^2 =\frac{1}{n}\sum_{g=1}^N \sum_{t=1}^{T_g } {\left(\psi_{j\left(g,t\right)} -\bar{\psi} \right)}^2 \qquad \text{and }\sigma_{\alpha ,\psi } =\frac{1}{n}\sum_{g=1}^N \sum_{t=1}^{T_g } \left(\psi_{j\left(g,t\right)} -\bar{\psi} \right)\alpha_g           
\end{equation}

where
\(\bar{\psi} =\frac{1}{n}\sum_{g=1}^N \sum_{t=1}^{T_g } \psi_{j(g,t)}\).
The variance component \(\sigma_{\psi }^2\) measures the contribution of
firm wage variability to inequality, while the covariance component
\(\sigma_{\alpha ,\psi }\) measures the additional contribution of
systematic sorting of high-wage workers to high-wage firms.

The function \texttt{leave\_out\_KSS} provides unbiased estimates of
\(\sigma_{\psi }^2\) and \(\sigma_{\alpha ,\psi }\) as well as an
estimate of
\(\sigma_{\alpha }^2 =\frac{1}{n}\sum_{g=1}^N \sum_{t=1}^{T_g } {\left(\alpha_g -\bar{\alpha} \right)}^2\)
using the leave-out bias-correction approach proposed by KSS.

\hypertarget{the-kss-correction}{%
\subsection{The KSS Correction}\label{the-kss-correction}}

We now provide some general intuition about the KSS leave-out
methodology. A more formal discussion can be found in KSS.
\hypertarget{the-plug-in-estimator}{%
\subsubsection{The Plug-in Estimator}\label{the-plug-in-estimator}}

Suppose that the researcher is interested in the variance of firm effects,
\(\sigma_{\psi }^2\). To simplify the exposition, we normalize the firm
effects so that their firm-size-weighted mean is equal to zero,
i.e, ~\(\bar{\psi}=0\), and rewrite \(\sigma_{\psi }^2\) as

\begin{equation}
\sigma_{\psi }^2 =\sum_{j=1}^J s_{j}\psi^{2}_{j}
\end{equation}

where $s_{j}$ gives the employment share of firm $j$, i.e., $s_{j}=\dfrac{1}{n}\sum_{g=1}^{N}\sum_{t=1}^{T_{g}}\mathbf{1}\{j(g,t)=j\}$.

It is customary to report ``plug-in'' estimates of a given variance
component using the corresponding OLS estimate. For instance, the
plug-in estimate of the variance of firm effects \(\sigma_{\psi }^2\) is
given by

\begin{equation}
\tilde{\sigma}_{\psi}^2=\sum_{j=1}^J s_{j}\hat{\psi}^{2}_{j}
\end{equation}

where \(\hat{\psi}_{j}\) is the OLS estimate obtained after estimating
equation (1) via OLS.

\hypertarget{the-bias-in-the-plug-in-estimator}{%
\subsubsection{The Bias in the Plug-in
Estimator}\label{the-bias-in-the-plug-in-estimator}}

The estimated firm effect, \(\hat{\psi}_{j}\), represents a noisy
estimate of the true firm effect, \(\psi_{j}\). The presence of noise in
\(\hat{\psi}_{j}\) is not an issue when one is interested in
\(\psi_{j}\) as the OLS estimator \(\hat{\psi}_{j}\) is assumed to be
unbiased in this context, i.e., ~\(E[\hat{\psi}_{j}]=\psi_{j}\).

However, the estimation error in \(\hat{\psi}_{j}\) is going to lead to
biases if one is interested in estimating \(\psi_{j}^{2}\) using its ``plug-in'' analogue
\(\hat{\psi}_{j}^{2}\) since

\begin{equation}
E[\hat{\psi}_{j}^{2}]=E[(\hat{\psi}_{j}-\psi_{j}+\psi_{j})^2]=\psi^{2}+\underbrace{\mathbb{V}[\hat{\psi}_{j}]}_{\text{Bias}},
\end{equation}

where \(\mathbb{V}[\hat{\psi}_{j}]\) is the squared standard error of
\(\hat{\psi}_{j}\). Intuitively, when we take the square of
\(\hat{\psi}_{j}\) we are not only squaring its signal, \(\psi_{j}\),
but also the estimation error in each \(\hat{\psi}_{j}\). The latter is
going to introduce a bias when estimating \(\psi^{2}_{j}\).

The same logic applies when analyzing the bias of the plug-in estimator
of the variance of firm effects since

\begin{equation}
E[\tilde{\sigma}_{\psi}^2]=\sigma^{2}_{\psi}+\underbrace{\sum_{j=1}^{J}s_{j}{\mathbb{V}[\hat{\psi}_{j}]}}_{\text{Bias}}
\end{equation}

\hypertarget{the-problem-with-standard-standard-errors-in-high-dimensional-models}{%
\subsubsection{The Problem with ``Standard'' Standard Errors in High-
Dimensional
Models}\label{the-problem-with-standard-standard-errors-in-high-dimensional-models}}

The above formula shows that the bias of the plug-in estimator of the
variance of firm effects is

\begin{equation}
E[\tilde{\sigma}_{\psi}^2]-\sigma^{2}_{\psi}=\sum_{j=1}^{J}s_{j}{\mathbb{V}[\hat{\psi}_{j}]}.
\end{equation}

Therefore, all that is required for a bias correction is an estimate of
the (squared) standard error of each firm effect,
\({\mathbb{V}[\hat{\psi}_{j}]}\). Similarly, if we are interested on the
variance of person effects, then we would need the standard error on
each of the person effects, \({\mathbb{V}[\hat{\alpha}_{i}]}\). If we
are interested in the covariance of worker and firm effects, then we
would need the covariances in sampling errors between each
\(\hat{\alpha}_{i}\) and \(\hat{\psi}_{j(i,t)}\).

The above discussion highlights that an estimate of the sampling
variability of the OLS coefficient vector
\(\hat{\beta}=(\hat{\alpha},\hat{\psi},\hat{\delta})\) is required in
order to derive an unbiased estimate of the variance components of the
AKM model displayed in equation (2).

Recall that the sampling variability in \(\hat{\beta}\), assuming
independence across observations, is given by

\begin{equation}
{\mathbb{{V}}}[\hat{\beta}]=\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1}\sum_{i=1}^{n}{\sigma}^{2}_{i}x_{i}x_{i}'\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1},
\end{equation}

where \(\sigma^{2}_{i}=Var(\varepsilon_{i})\).

One might be tempted to provide an estimate of \(\mathbb{V}[\hat{\beta}]\)
using heteroskedasticity \emph{consistent} (``HC'') or robust standard
errors. Standard \cite{white1980heteroskedasticity} HC standard-errors are calculated using a plug-in
estimate of \(\sigma^{2}_{i}\) based on

\begin{equation}
\tilde{\sigma}^{2}_{i}=(y_{i}-x_{i}'\hat{\beta})^2,
\end{equation}

where the HC-based estimate of \(\mathbb{V}[\hat{\beta}]\) is given by

\begin{equation}
\tilde{\mathbb{{V}}}[\hat{\beta}]=\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1}\sum_{i=1}^{n}\tilde{{\sigma}}^{2}_{i}x_{i}x_{i}'\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1}.
\end{equation}

However, HC standard errors based on \(\tilde{\sigma}^{2}_{i}\) are downward biased \citep{mackinnon1985some}. From an asymptotic perspective, HC standard errors are
inconsistent in any high-dimensional model where the number of
parameters grows in proportion to the sample size \citep{cattaneo2017inference}. Such "many regressor" asymptotics are natural in the worker-firm fixed effects model  as we often have fewer than 5 worker moves on average per firm.

\hypertarget{the-leave-out-correction}{%
\subsubsection{The Leave-Out
Correction}\label{the-leave-out-correction}}

KSS provides a heteroskedasticity-\emph{unbiased} (HU) estimate of the
standard error of any coefficient obtained from a
linear regression model.

The KSS HU standard error estimate is based on a leave-out estimate of
\(\sigma^{2}_{i}\):

\begin{equation}
\hat{\sigma}^{2}_{i}=y_{i}(y_{i}-x_{i}'\hat{\beta}_{-i}),
\end{equation}

where \(\hat{\beta}_{-i}\) is the OLS estimate of \(\beta\) from
equation (2) when observation \(i\) is left out.

KSS then replaces \(\sigma^{2}_{i}\) in \(\mathbb{V}[\hat{\beta}]\) with
its unbiased estimate \(\hat{\sigma}^{2}_{i}\) to derive an HU estimate
of \(\mathbb{V}[\hat{\beta}]:\)

\begin{equation}
\hat{\mathbb{{V}}}[\hat{\beta}]=\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1}\sum_{i=1}^{n}\hat{{\sigma}}^{2}_{i}x_{i}x_{i}'\left(\sum_{i=1}^{n}x_{i}x_{i}'\right)^{-1}.
\end{equation}

Going back to the example of the variance of the firm effects, we can
extract from \(\hat{\mathbb{V}}[\hat{\beta}]\) the corresponding squared
standard error of each firm effect, \(\hat{\mathbb{V}}[\hat{\psi}_{j}]\).
We can then use it to bias-correct the corresponding estimate of
the variance of the firm effects as follows:

\begin{equation}
\hat{\sigma}^{2}_{\psi}=\tilde{\sigma}^{2}_{\psi}-\sum_{j=1}^{J}s_{j}{\mathbb{V}[\hat{\psi}_{j}]}.
\end{equation}

The MATLAB function \texttt{leave\_out\_KSS} is going to print the bias-corrected variance of firm effects, \(\hat{\sigma}^{2}_{\psi}\), the bias-corrected covariance of worker and firm effects and variance
of person effects.  \texttt{leave\_out\_KSS} also provides the correct standard errors --- based on \(\hat{\mathbb{V}}[\hat{\beta}]\)  as opposed to \(\tilde{\mathbb{V}}[\hat{\beta}]\) --- when one regresses the firm effects on some observable characteristics; see Section \ref{sec:lincom}.
\hypertarget{computing-the-kss-correction}{%
\section{Computing the KSS
Correction}\label{computing-the-kss-correction}}

We now demonstrate how one can implement the KSS correction in two-way
models using MATLAB and the function \texttt{leave\_out\_KSS}. We
continue to work with a simple example based on an AKM model. In what
follows, we use the words ``workers'' and ``firms'' when describing the
procedure but the function \texttt{leave\_out\_KSS}
can in fact be applied to any two-way fixed effects model (e.g.,~patients and doctors,
students and teachers, strata and treatment arms).

\hypertarget{setup}{%
\subsection{Setup}\label{setup}}

We begin with some auxiliary lines of code that define the relevant
paths, call the \href{http://www.cs.cmu.edu/jkoutis/cmg.html}{CMG package} package developed by Yiannis Koutis and set-up the
parallel environment within MATLAB.

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}Setup Paths and Install CMG}
\PY{n}{clc}
\PY{l+s}{clear}
\PY{n}{cd} \PY{l+s}{\PYZsq{}/Users/raffaelesaggio/Dropbox/LeaveOutTwoWay\PYZsq{}}
\PY{n}{path}\PY{p}{(}\PY{n}{path}\PY{p}{,}\PY{l+s}{\PYZsq{}}\PY{l+s}{codes\PYZsq{}}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}this contains the main LeaveOut Routines.}
\PY{n}{path}\PY{p}{(}\PY{n}{path}\PY{p}{,}\PY{l+s}{\PYZsq{}}\PY{l+s}{CMG\PYZsq{}}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{} CMG package http://www.cs.cmu.edu/\PYZti{}jkoutis/cmg.html}
\PY{p}{[}\PY{n}{result}\PY{p}{,}\PY{n}{output}\PY{p}{]} \PY{p}{=} \PY{n}{evalc}\PY{p}{(}\PY{l+s}{\PYZsq{}}\PY{l+s}{installCMG(1)\PYZsq{}}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}installs CMG routine (silently)}
\PY{n}{delete}\PY{p}{(}\PY{n}{gcp}\PY{p}{(}\PY{l+s}{\PYZdq{}nocreate\PYZdq{}}\PY{p}{)}\PY{p}{)} \PY{c}{\PYZpc{}clear parallel envir.}
\PY{n}{c} \PY{p}{=} \PY{n}{parcluster}\PY{p}{(}\PY{l+s}{\PYZsq{}}\PY{l+s}{local\PYZsq{}}\PY{p}{)}\PY{p}{;}  \PY{c}{\PYZpc{}tell me \PYZsh{} of available cores}
\PY{n}{nw} \PY{p}{=} \PY{n}{c}\PY{p}{.}\PY{n}{NumWorkers}\PY{p}{;} \PY{c}{\PYZpc{}tell me \PYZsh{} of available cores}
\PY{n}{pool}\PY{p}{=}\PY{n}{parpool}\PY{p}{(}\PY{n}{nw}\PY{p}{,}\PY{l+s}{\PYZsq{}}\PY{l+s}{IdleTimeout\PYZsq{}}\PY{p}{,} \PY{n}{Inf}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}all cores will be assigned to Matlab}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
Starting parallel pool (parpool) using the 'local' profile {\ldots}
Connected to the parallel pool (number of workers: 6).
    \end{Verbatim}

    \hypertarget{importing-the-data}{%
\subsection{Importing the Data}\label{importing-the-data}}

The GitHub Repo contains a matched employer-employee testing data where
we observe the identity of the worker, the identity of the firm
employing a given worker, the year in which the match is observed
(either 1999 or 2001), and the associated log wage.

\emph{Important!} The original data must be sorted by individual
identifiers (id). For instance, one can see that the testing data is
sorted by individual identifiers (and by year, using
\texttt{xtset\ id\ year} in Stata)

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Import Data}
\PY{n}{namesrc}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{data/test.csv\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}path to original testing data}
\PY{n}{data}\PY{p}{=}\PY{n}{importdata}\PY{p}{(}\PY{n}{namesrc}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}import data}
\PY{n}{id}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}worker identifiers}
\PY{n}{firmid}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}firm identifiers}
\PY{n}{y}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{} outcome variable}
\PY{n}{clear} \PY{l+s}{data}
\end{Verbatim}
\end{tcolorbox}

    \hypertarget{calling-the-main-function}{%
\subsection{Calling the Main Function}\label{calling-the-main-function}}

The function \texttt{leave\_out\_KSS} relies on three mandatory inputs:
\texttt{(y,id,firmid)}. We can obtain an unbiased variance decomposition
of the associated AKM model by simply calling

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Run KSS!}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{)}\PY{p}{;}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 200 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7636
variance of wage: 0.1245
\# of Movers: 6414
\# of Firms: 1684
\# of Person Year Observations: 56044
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 5.602229 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.019821
Covariance of Firm, Person Effects: -0.0039091
Variance of Person Effects: 0.10354
Correlation of Firm, Person Effects: -0.08629
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.010218
Covariance of Firm and Person Effects: 0.0047795
Variance of Person Effects: 0.085005
Correlation of Firm, Person Effects: 0.16217
    \end{Verbatim}

    \hypertarget{interpreting-the-output}{%
\section{Interpreting the Output}\label{interpreting-the-output}}

The code starts by printing its two key inputs: the algorithm used to compute the statistical leverages (exact vs. JLA) --- we explain this distinction in Section \ref{sec:JLA} --- and the level at which the leave-out correction is carried (observation vs. match) --- we explain this in more detail in Section \ref{sec:leave_out_level}. 

The output printed by  \texttt{leave\_out\_KSS}  is composed of
three sections.

\textbf{Section 1}: Here we provide info on the leave-out connected set.
This is the largest connected set of firms that remains connected after
any worker from the associated graph is removed, see Lemma 1 and the
Computational Appendix of KSS for details.
The code provides some summary statistics (e.g. number of movers, number of
firms, mean and variance of the outcome, etc.) of the leave-out
connected set.

\textbf{Section 2}: After printing the summary statistics, the code
computes the statistical leverages of the design, denoted by \(P_{ii}\).
Computation of \(\{P_{ii}\}_{i=1}^{n}\) represents the main
computational bottleneck of the routine.

\textbf{Section 3}: The code then enters its third, and final stage,
where the main results are printed. The code starts by reporting the ---
biased --- estimates of the variance components that result from the
``plug-in'' approach of treating OLS estimates as measured without
error. Finally, the code prints the bias-corrected variance of firm
effects and the covariance of worker and firm effects.

    \hypertarget{what-does-the-code-save}{%
\section{What Does the Code Save?}\label{what-does-the-code-save}}

\texttt{leave\_out\_KSS} saves three scalars: the variance of firm
effects (\texttt{sigma2\_psi} in {[}4{]}), the covariance of worker and
firm effects (\texttt{sigma\_psi\_alpha}), and the variance of person
effects (\texttt{sigma2\_alpha}).

\texttt{leave\_out\_KSS} also saves on disk one .csv file. This .csv
contains information on the leave-out connected set. This file has 4
columns. First column reports the outcome variable, second and third
columns the worker and the firm identifiers (as originally inputted by
the user) and the fourth column reports the statistical leverages of the
regression design. If the code is reporting a leave-out correction at the
match-level, the .csv will be collapsed at the match level. By default,
the .csv file is going to be saved in the main directory under the name
\texttt{leave\_out\_estimates}. The user can specify an alternative path
using the option \texttt{filename} when calling
\texttt{leave\_out\_KSS}.

    \hypertarget{scaling-to-large-datasets}{%
\section{Scaling to Large Datasets}\label{scaling-to-large-datasets}}
\label{sec:JLA}
\texttt{leave\_out\_KSS} can be used on extremely large datasets. The
code uses a variant of the random projection method, known as the
Johnson-- Lindenstrauss approximation (JLA) algorithm in KSS
for its connection to the work of \cite{johnson1984extensions}; see
also \cite{achlioptas2001database}. We now discuss briefly the main computational
bottleneck of the procedure and the JLA algorithm.

\hypertarget{computational-bottleneck}{%
\subsection{Computational Bottleneck}\label{computational-bottleneck}}

Recall from the discussion in Section 1 that the KSS leave-out bias
correction procedure relies on leave-out estimates of \(\sigma^{2}_{i}\),

\begin{equation}
\hat{\sigma}^{2}_{i}=y_{i}(y_{i}-x_{i}'\hat{\beta}_{-i}),
\end{equation}

where \(\hat{\beta}_{-i}\) is the OLS estimate of \(\beta\) from the AKM
model in equation (2) after leaving observation \(i\) out.

Clearly, reestimating \(\hat{\beta}_{-i}\) by leaving a particular
observation \(i\) for \(n\) times, is infeasible computationally.
Fortunately, one can rewrite \(\hat{\sigma}^{2}_{i}\) as

\begin{equation}
\hat{\sigma}^{2}_{i}=y_{i}\frac{(y_{i}-x_{i}'\hat{\beta})}{1-P_{ii}},
\end{equation}

where \(P_{ii}\) measures the influence or leverage of observation
\(i\), i.e., ~\(P_{ii} =x_i^{\prime } S_{xx}^{-1} x_i\). The above expression
highlights that all that is needed for computation of
\(\hat{\sigma}^{2}_{i}\) are the \(n\) statistical leverages
\(\{P_{ii}\}_{i=1}^{n}\). However, exact computation of \(P_{ii}\) may remain prohibitive when \(n\) is in the order of tens of millions or
higher.

\hypertarget{the-jla-algorithm}{%
\subsection{Approximating the Statistical Leverages}\label{the-jla-algorithm}}

The JLA algorithm introduced by KSS provides a stochastic approximation
to \(\{P_{ii}\}_{i=1}^{n}\) using the random projection ideas developed
by Johnson and Lindenstrauss (1984). We refer the reader to the
\href{https://www.dropbox.com/s/ycvls8pbtxewj06/DataComputationAppendix.pdf?dl=1}{Computational Appendix of KSS} for further details.

The number of simulations underlying the JLA algorithm is governed by
the input \texttt{simulations\_JLA} (which is denoted by \(p\) in the
computational appendix). Intuitively, more simulations imply a
higher accuracy -- but also higher computation time --- when estimating
\(\lbrace P_{ii} ,B_{ii} \rbrace_{i=1}^n\).

\textbf{Note:} The user might want to prespecify a random-number
generator seed to ensure replicability when calling the function
\texttt{leave\_out\_KSS}.

We now demonstrate the performance of the code on a large dataset.

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Running KSS on a large dataset          }
\PY{n}{websave}\PY{p}{(}\PY{l+s}{\PYZsq{}}\PY{l+s}{large\PYZus{}fake.csv\PYZsq{}}\PY{p}{,} \PY{l+s}{\PYZsq{}}\PY{l+s}{https://www.dropbox.com/s/ny5tef29ij7ran2/large\PYZus{}fake\PYZus{}data.csv?dl=1\PYZsq{}}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}downloads and saves to disk a fake, large matched employer employee data}
\PY{n}{namesrc}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{large\PYZus{}fake.csv\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}path to the large data}
\PY{n}{data}\PY{p}{=}\PY{n}{importdata}\PY{p}{(}\PY{n}{namesrc}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}import data }
\PY{n}{id}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}worker identifiers}
\PY{n}{firmid}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}firm identifiers}
\PY{n}{y}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{} outcome variable}
\PY{n}{clear} \PY{l+s}{data}
\PY{n}{delete}\PY{p}{(}\PY{l+s}{\PYZsq{}}\PY{l+s}{large\PYZus{}fake.csv\PYZsq{}}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}delete original .csv data from disk}

\PY{c}{\PYZpc{}Run Leave Out Correction (50 simulations) }
\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{JLA\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}run random projection algorithm}
\PY{n}{simulations\PYZus{}JLA}\PY{p}{=}\PY{l+m+mi}{50}\PY{p}{;}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{,}\PY{n}{simulations\PYZus{}JLA}\PY{p}{)}\PY{p}{;}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 50 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7304
variance of wage: 0.16248
\# of Movers: 916632
\# of Firms: 165360
\# of Person Year Observations: 13860616
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 251.168051 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.039448
Covariance of Firm, Person Effects: 0.0084313
Variance of Person Effects: 0.080329
Correlation of Firm, Person Effects: 0.14978
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.030371
Covariance of Firm and Person Effects: 0.014639
Variance of Person Effects: 0.048803
Correlation of Firm, Person Effects: 0.38024
    \end{Verbatim}

    We can see from the output that the leave-out connected set has almost
14 million person-year observations. The code is able to complete in
around 4 minutes (on a 2020 Macbook Pro with 6 cores and 16GB of RAM).

The computational appendix in KSS shows that the JLA algorithm can cut
computation time by a factor of 100 while introducing an approximation
error of roughly \(10^{-4}\).

The current code uses an improved estimator of both \(P_{ii}\) and
\(M_{ii}=1-P_{ii}\), which are both guaranteed to lie in \([0,1]\). These
improved estimators are then combined to derive an asymptotically unbiased JLA
estimator of a given variance component provided that
\(\frac{n}{p^{4}}=o(1)\); see \href{https://www.dropbox.com/s/i28yvzae2tnp2tl/improved_JLA.pdf?dl=1}{this document} for further details..

We can check the stability of the estimates for different values of
\texttt{simulations\_JLA}. For instance, if we double
\texttt{simulations\_JLA} from 50 to 100 and run the code again on the
same data:

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Compute estimates while doubling number of simulations}
\PY{n}{simulations\PYZus{}JLA}\PY{p}{=}\PY{l+m+mi}{100}\PY{p}{;}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{,}\PY{n}{simulations\PYZus{}JLA}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}check stability of variance components}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 100 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7304
variance of wage: 0.16248
\# of Movers: 916632
\# of Firms: 165360
\# of Person Year Observations: 13860616
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 458.093692 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.039448
Covariance of Firm, Person Effects: 0.0084313
Variance of Person Effects: 0.080329
Correlation of Firm, Person Effects: 0.14978
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.030382
Covariance of Firm and Person Effects: 0.014598
Variance of Person Effects: 0.048738
Correlation of Firm, Person Effects: 0.37937
    \end{Verbatim}

    We obtain virtually the same variance components as when
\texttt{simulations\_JLA}=50 while significantly increasing the
computational time! If the user does not specify a value for
\texttt{simulations\_JLA}, the code defaults to
\texttt{simulations\_JLA}=200.

We conclude this section by noting that the user can also calculate an
exact version of \(\lbrace P_{ii} \rbrace_{i=1}^n\). This can be done by
setting the option \texttt{type\_of\_algorithm} to \texttt{exact}.

\textbf{Warning!} Calling the option \texttt{exact} in large datasets
can be very time-consuming! We now load again the original, smaller,
testing data and then compare the exact and JLA-based estimates of the
variance components,

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Compare Exact vs. JLA Estimates}
\PY{n}{namesrc}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{data/test.csv\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}path to original testing data}
\PY{n}{data}\PY{p}{=}\PY{n}{importdata}\PY{p}{(}\PY{n}{namesrc}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}import data}
\PY{n}{id}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}worker identifiers}
\PY{n}{firmid}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}firm identifiers}
\PY{n}{y}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{} outcome variable}
\PY{n}{clear} \PY{l+s}{data}

\PY{c}{\PYZpc{}Run Leave Out Correction with exact}
\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{exact\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}run random projection algorithm;}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{)}\PY{p}{;}

\PY{c}{\PYZpc{}Run Leave Out Correction with JLA}
\PY{n}{simulations\PYZus{}JLA}\PY{p}{=}\PY{l+m+mi}{100}\PY{p}{;}
\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{JLA\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}run random projection algorithm;}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{type\PYZus{}of\PYZus{}algorithm}\PY{p}{,}\PY{n}{simulations\PYZus{}JLA}\PY{p}{)}\PY{p}{;}
\PY{c}{\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}\PYZpc{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: Exact
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7636
variance of wage: 0.1245
\# of Movers: 6414
\# of Firms: 1684
\# of Person Year Observations: 56044
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running Exact Algorithm{\ldots}
Done!
Elapsed time is 162.242224 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.019821
Covariance of Firm, Person Effects: -0.0039091
Variance of Person Effects: 0.10354
Correlation of Firm, Person Effects: -0.08629
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.010289
Covariance of Firm and Person Effects: 0.0046293
Variance of Person Effects: 0.085204
Correlation of Firm, Person Effects: 0.15635
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 100 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7636
variance of wage: 0.1245
\# of Movers: 6414
\# of Firms: 1684
\# of Person Year Observations: 56044
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 2.434287 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.019821
Covariance of Firm, Person Effects: -0.0039091
Variance of Person Effects: 0.10354
Correlation of Firm, Person Effects: -0.08629
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.01044
Covariance of Firm and Person Effects: 0.0044957
Variance of Person Effects: 0.085326
Correlation of Firm, Person Effects: 0.15063
    \end{Verbatim}

    The variance components estimated using JLA are extremely close to the
\texttt{exact} estimates but only take a fraction of the time to compute. If
the input data has more than 10,000 observations, the code defaults to using the
JLA algorithm unless the user specifies type\_of\_algorithm as
``exact''.

    \hypertarget{adding-controls}{%
\section{Adding Controls}\label{adding-controls}}

We have demonstrated the functioning of \texttt{leave\_out\_KSS} using a
simple AKM model with no controls (\(w_{gt}=0\)). It is easy to add a
matrix of controls to the routine. Suppose for instance that we want to
add year fixed effects to the original AKM model. This can be done as
follows.

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} How to add controls}
\PY{n}{namesrc}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{data/test.csv\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}path to original testing data}
\PY{n}{data}\PY{p}{=}\PY{n}{importdata}\PY{p}{(}\PY{n}{namesrc}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}import data}
\PY{n}{id}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}worker identifiers}
\PY{n}{firmid}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}firm identifiers}
\PY{n}{year}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{3}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}year identifier}
\PY{n}{y}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{} outcome variable}
\PY{n}{clear} \PY{l+s}{data}

\PY{c}{\PYZpc{}Specify year fixed effects as controls}
\PY{p}{[}\PY{o}{\PYZti{}}\PY{p}{,}\PY{o}{\PYZti{}}\PY{p}{,}\PY{n}{controls}\PY{p}{]} \PY{p}{=} \PY{n}{unique}\PY{p}{(}\PY{n}{year}\PY{p}{)}\PY{p}{;}
\PY{n}{controls} 	   \PY{p}{=} \PY{n}{sparse}\PY{p}{(}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{:}\PY{n+nb}{size}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZsq{}}\PY{p}{,}\PY{n}{controls}\PY{o}{\PYZsq{}}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{,}\PY{n+nb}{size}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{,}\PY{n}{max}\PY{p}{(}\PY{n}{controls}\PY{p}{)}\PY{p}{)}\PY{p}{;}
\PY{n}{controls}       \PY{p}{=} \PY{n}{controls}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{:}\PY{k}{end}\PY{o}{\PYZhy{}}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}to avoid collinearity issues, omit last year fixed effects.}

\PY{c}{\PYZpc{}Call KSS with matrix of controls}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{n}{controls}\PY{p}{)}\PY{p}{;}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 200 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7636
variance of wage: 0.1245
\# of Movers: 6414
\# of Firms: 1684
\# of Person Year Observations: 56044
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
pcg converged at iteration 58 to a solution with relative residual 8.7e-11.
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 4.271175 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.019479
Covariance of Firm, Person Effects: -0.004008
Variance of Person Effects: 0.10404
Correlation of Firm, Person Effects: -0.089031
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.0097812
Covariance of Firm and Person Effects: 0.0046239
Variance of Person Effects: 0.08578
Correlation of Firm, Person Effects: 0.15963
    \end{Verbatim}

    When controls are specified, the code proceeds by partialling them out.
That is, it first estimates by OLS the AKM model in the leave-out connected set

\begin{equation}
y_{gt}=\alpha_{g}+\psi_{j(g,t)}+w_{gt}'\delta+\varepsilon_{gt}
\end{equation}

from which we obtain \(\hat{\delta}\). We then work with a residualized
model where the outcome variable is now defined as
\(y_{gt}^{new}=y_{gt}-w_{gt}'\hat{\delta}\) and project this
residualized outcome on worker and firm indicators and report the
associated (bias-corrected) variance components.

\section{Leaving Out a Person-Year Observation vs.~Leaving Out a
Match}
\label{sec:leave_out_level}

By default, the code reports leave-out corrections for the variance of
firm effects and the covariance of firm and worker effects that are
robust to unrestricted heteroskedasticity and serial correlation of the
error term within a given match (defined as the unique combination of the worker and firm
identifier); see Remark 3 of KSS. Intuitively, leaving out matches is analogous to "clustering" the standard error estimates at the match level. Section \ref{sec:var_pe} discusses the interpretation of the
leave-out bias-corrected variance of person effects when leaving a match out


The user can specify the function to run the KSS correction when leaving
only an observation out using the option \texttt{leave\_out\_level}.
When the user leaves a person-year observation out, the resulting KSS variance
components are robust to unrestricted heteroskedasticity but not to serial
correlation within a match. Below we demonstrate how to compute KSS-
adjusted variance components when leaving a single (person-year)
observation out.

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}\PYZpc{} Leaving out a Person\PYZhy{}Year Observation vs. Leaving Out a Match }

\PY{n}{leave\PYZus{}out\PYZus{}level}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{obs\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}leave a single person\PYZhy{}year observation out}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]}  \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{leave\PYZus{}out\PYZus{}level}\PY{p}{)}\PY{p}{;}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave person-year observation out
Algorithm for Computation of Statistical Leverages: JLA with 200 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7636
variance of wage: 0.1245
\# of Movers: 6414
\# of Firms: 1684
\# of Person Year Observations: 56044
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 4.700531 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.019821
Covariance of Firm, Person Effects: -0.0039091
Variance of Person Effects: 0.10354
Correlation of Firm, Person Effects: -0.08629
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.010306
Covariance of Firm and Person Effects: 0.0046087
Variance of Person Effects: 0.085253
Correlation of Firm, Person Effects: 0.15548
    \end{Verbatim}

    When \(T=2\) (i.e., the underlying matched employer-employee data spans
only two years), as in this example, it turns out that the KSS-adjusted
variance of firm effects and covariance of firm and worker effects are
robust to any arbitrary correlation between \(\varepsilon_{g2}\) and
\(\varepsilon_{g1}\).

    \hypertarget{variance-of-person-effects-when-leaving-out-a-match}{%
\section{Variance of Person Effects When Leaving Out a
Match}\label{sec:var_pe}}

By leaving a match-out, we can bias-correct the variance of firm
effects and the covariance of worker and firm effects while allowing for
unrestricted hetoreskedasticity and serial correlation of the error term
\(\varepsilon_{gt}\) within each worker-firm match.

However, the person effects, \(\alpha_{g}\), of ``stayers'' --- workers
that never leave a particular firm --- are not leave-match-out
estimable.\footnote{This is because leaving a match-out means leaving \textit{all} the observations associated with a stayer and therefore we cannot estimate her $\alpha_{g}$.} This implies that we cannot compute an unbiased estimate of
\(\Omega_{g}=Var(\varepsilon_{g1},...,\varepsilon_{gT_{g}})\) for
stayers. An estimate of \(\Omega_{g}\) for both stayers and movers is
required in order to provide a bias-correction for the variance of
person effects; see Section 1 and Remark 3 in KSS.

The current implementation of the code estimates 
$\Omega_{g}$ for stayers by leaving only a single observation out, that is, by assuming $\Omega_g$ is diagonal. This approach yields an upper bound estimate on
the variance of person effects (computed across both stayers and
movers).

There are several alternatives that the user can explore:

\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
  Estimate a variance decomposition in a sample of movers only: For
  movers, it is possible to estimate a leave-out bias-corrected variance
  of person effects that is robust to both unrestricted
  heteroskedasticity and serial correlation in the error term of the AKM
  model within a given match. Therefore, one can provide an unbiased
  variance decomposition of all the three components of the two-way
  fixed effects model by simply feeding to the function
  \texttt{leave\_out\_KSS} a movers-only sample.
\item
  Drop adjacent wage observations for stayers: Under the assumption that
  the errors are serially independent after $m$ periods, it suffices to
  keep every $m$th stayer observation and apply the 
  estimator after leaving a person-year observation out. For example, if \(m=2\) and we have a balanced panel
  with \(T=5\), we can restore independence of the errors in the stayer
  sample by keeping any of the following pairs of stayer time periods:
  (1,4), (2,5), (1,5). One can choose randomly from the available pairs
  for each stayer with equal probability.
 \item 
  Drop interior wage observations for stayers:  To minimize concerns regarding serial correlation, the user can drop all but the first and last wage observations of each stayer. Note that dropping stayer wage observations reduces their weight in the variance components. Future versions of the code will allow the variance components to be defined in terms of weights other than the number of micro-observations.
\end{enumerate}

    \hypertarget{regressing-firm-fixed-effects-on-observables}{%
\section{Regressing Firm Fixed Effects on
Observables}\label{regressing-firm-fixed-effects-on-observables}}
\label{sec:lincom}
It is common in empirical applications to regress the fixed effects
estimated from the two-way model on some observable characteristics.
Using the AKM model again as our leading example, suppose that we are
interested in the linear projection of the firm effects \(\psi_{gt}\)
on some observables \(Z_{gt}\)

\begin{equation}
\psi_{j(g,t)}=Z_{gt}'\gamma+e_{gt}.
\end{equation}

The standard practice is to estimate \(\gamma\) using a simple regression
where the estimated firm effects, \(\hat{\psi}_{j(g,t)}\), are regressed
on \(Z_{gt}\)

\begin{equation}
\hat{\gamma}=\left(\sum_{g,t}Z_{gt}Z_{gt}'
\right)^{-1}Z_{gt}\hat{\psi}_{gt}.
\end{equation}

KSS show that inference on \(\hat{\gamma}\) needs to be adjusted because the estimated firm fixed effects $\{\hat{\psi}_{j}\}_{j=1}^{J}$ are correlated with one another.

To see this, suppose that we have a simple AKM model with only two time
periods, set \(w_{gt}=0\), and take first differences
\(\Delta y_{g}\equiv y_{g2}-y_{g1}\) to eliminate the worker fixed
effects so that the AKM model becomes \begin{equation}
\Delta y_{g}=\Delta f_{g}'\psi+\varepsilon_{g},
\end{equation}

where \(\Delta f_{g}=f_{g,2}-f_{g,1}\) and
\(f_{gt}=\{\mathbf{1}_{j(g,t)=1},..,\mathbf{1}_{j(g,t)=J}\}\) is the
vector containing the firm dummies. In this model,

\begin{equation}
\hat{\psi}=\psi+\underbrace{\sum_{g=1}^{N}(\Delta f_{g}\Delta f_{g}')^{-1}\Delta f_{g}\varepsilon_{g}}_{\text{Correlated Noise}}.
\end{equation}

Note how the dependence in the vector of estimated firm fixed effects,
\(\hat{\psi}\), is induced by the regressor design
\(\sum_{g=1}^{N}(\Delta f_{g}\Delta f_{g}')^{-1}\). As shown in
Table 3 of KSS, ignoring this correlation can easily lead to underestimating standard
errors by an order of magnitude in practice.

The package provides the HU standard errors on \(\hat{\gamma}\)
using the function \texttt{lincom\_KSS}, which is designed to emulate the Stata
function \href{https://www.stata.com/manuals13/rlincom.pdf}{lincom} and therefore works as a post-estimation
command. We demonstrate the functioning of \texttt{lincom\_KSS} with an
example.

In this example, we are interested in testing whether the difference in
person-year weighted mean firm effects between region 1 and region 2 is
statistically different from zero. This amounts to running a regression
where the dependent variable is the vector of estimated firm effects and
the set of observables, \(Z_{gt}\) , is here represented by a constant
and a dummy for whether the firm of worker \(g\) in year \(t\) belongs
to region 2.

The resulting coefficient (and standard error) can be computed by
calling the function \texttt{leave\_out\_KSS} specifying that we want to
run the \texttt{lincom} option and using the region dummy as \(Z_{gt}\)
(the constant is automatically added by the code).

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c}{\PYZpc{}Regressing firm effects on observables }
\PY{n}{namesrc}\PY{p}{=}\PY{l+s}{\PYZsq{}}\PY{l+s}{data/lincom.csv\PYZsq{}}\PY{p}{;} \PY{c}{\PYZpc{}testing data for the lincom function}
\PY{n}{data}\PY{p}{=}\PY{n}{importdata}\PY{p}{(}\PY{n}{namesrc}\PY{p}{)}\PY{p}{;}
\PY{n}{id}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{;} 
\PY{n}{firmid}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{2}\PY{p}{)}\PY{p}{;}
\PY{n}{y}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{5}\PY{p}{)}\PY{p}{;}
\PY{n}{region}\PY{p}{=}\PY{n}{data}\PY{p}{(}\PY{p}{:}\PY{p}{,}\PY{l+m+mi}{4}\PY{p}{)}\PY{p}{;} \PY{c}{\PYZpc{}Region indicator. Value \PYZhy{}1 for region 1, Value 1 for region 2;}
\PY{n}{region\PYZus{}dummy}\PY{p}{=}\PY{n}{region}\PY{p}{;}
\PY{n}{region\PYZus{}dummy}\PY{p}{(}\PY{n}{region\PYZus{}dummy}\PY{o}{==}\PY{o}{\PYZhy{}}\PY{l+m+mi}{1}\PY{p}{)}\PY{p}{=}\PY{l+m+mi}{0}\PY{p}{;} \PY{c}{\PYZpc{}Make it a proper dummy variable}

\PY{c}{\PYZpc{}Run the KSS correction and \PYZdq{}lincom\PYZdq{}}
\PY{n}{labels\PYZus{}lincom}\PY{p}{=}\PY{p}{\PYZob{}}\PY{l+s}{\PYZsq{}}\PY{l+s}{Region 2 Dummy\PYZsq{}}\PY{p}{\PYZcb{}}\PY{p}{;} \PY{c}{\PYZpc{}give me the label of the columns of Z.}
\PY{n}{lincom\PYZus{}do}\PY{p}{=}\PY{l+m+mi}{1}\PY{p}{;} \PY{c}{\PYZpc{}tell the function leave\PYZus{}out\PYZus{}KSS that we want to project the firm effects on some Z.}
\PY{n}{Z}\PY{p}{=}\PY{n}{region\PYZus{}dummy}\PY{p}{;} \PY{c}{\PYZpc{}we\PYZsq{}re going to project the firm effects on a constant +  the region dummy. Constant automatically added by the code}

\PY{c}{\PYZpc{}Ready to call KSS with lincom option!}
\PY{p}{[}\PY{n}{sigma2\PYZus{}psi}\PY{p}{,}\PY{n}{sigma\PYZus{}psi\PYZus{}alpha}\PY{p}{,}\PY{n}{sigma2\PYZus{}alpha}\PY{p}{]} \PY{p}{=} \PY{n}{leave\PYZus{}out\PYZus{}KSS}\PY{p}{(}\PY{n}{y}\PY{p}{,}\PY{n}{id}\PY{p}{,}\PY{n}{firmid}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{p}{[}\PY{p}{]}\PY{p}{,}\PY{n}{lincom\PYZus{}do}\PY{p}{,}\PY{n}{Z}\PY{p}{,}\PY{n}{labels\PYZus{}lincom}\PY{p}{)}\PY{p}{;}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Running KSS Correction with the following options
Leave Out Strategy: Leave match out
Algorithm for Computation of Statistical Leverages: JLA with 200 simulations.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 1
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Info on the leave one out connected set:
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
mean wage: 4.7047
variance of wage: 0.14653
\# of Movers: 9972
\# of Firms: 2974
\# of Person Year Observations: 89666
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 2
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
Calculating the statistical leverages of the AKM model{\ldots}
Running JLA Algorithm{\ldots}
Done!
Elapsed time is 9.324213 seconds.
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
SECTION 3
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
PLUG-IN ESTIMATES (BIASED)
Variance of Firm Effects: 0.060695
Covariance of Firm, Person Effects: -0.012603
Variance of Person Effects: 0.10318
Correlation of Firm, Person Effects: -0.15926
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
BIAS CORRECTED ESTIMATES
Variance of Firm Effects: 0.044613
Covariance of Firm and Person Effects: 0.0025688
Variance of Person Effects: 0.079191
Correlation of Firm, Person Effects: 0.043218
Regressing the firm effects on observables{\ldots}
pcg converged at iteration 115 to a solution with relative residual 8.6e-11.
******************************************
******************************************
RESULTS ON LINCOM
******************************************
******************************************
Coefficient on Region 2 Dummy:  0.25982
Robust "White" Standard Error: 0.050155
KSS Standard error:  0.088374
T-stat: 2.94
******************************************
    \end{Verbatim}

    We can see from the above output  (make sure to scroll until the end)
that the difference in person-year weighted mean firm effects between
the two regions is equal to 0.26. The traditional
HC or ``robust'' standard errors on this
coefficient is around 0.05 while the HU standard error derived in KSS is
roughly twice as large (0.09).


    \bibliography{lit}
    

\end{document}
